/* Simulations of reallocating panels at the country (interstate) level.
Objective: maximize energy+env value.

NOTES: All directories and paths should be set in the "Reallocation_master.do" file.

*/

clear all

import delimited `"${data_path}/Generation_and_damages_withCO2_2019-11-24_eGrid_SR.csv"', case(preserve) 
rename zip zipcode
drop num_systems

merge 1:1 zipcode using `"${data_intermediate}/NumberSystems.dta"'
*Note: We have 1478 zipcodes with solar panels but no damage calculations.
*So I assign damages to these zipcodes based on the damages from the previous zipcode (when zipcodes are listed in numerical order)
replace total_damages = total_damages[_n-1] if _merge==2
drop _merge

merge 1:1 zipcode using `"${data_path}/HousingUnits"'
*need to drop zipcodes with missing housing units or zip codes that have 0 housing units
*Note: There are 121 zip codes with a positive number of installations but 0 housing units
*This can be observed by uncommenting the next line.
*count if single_units ==0 & num_systems>0 & !missing(num_systems)
drop if missing(single_units) | single_units==0

rename _merge _merge_housing

*fill in missing state variable with ZipCodeToState file
merge 1:1 zipcode using `"${data_path}/ZipCodesToState"'
replace state = stateabbreviation if missing(state)

*label the Puerto Rico zipcodes
replace state="PR" if missing(state) & zipcode>600 & zipcode<1000

*drop Puerto Rico, Hawaii, and Alaska
drop if state=="HI" | state=="AK" | state=="PR"
*drop zips if we have no data for them
drop if _merge==2
drop _merge

rename total_damages total_env_damages

gen total_damages = total_env_damages+ genval_severin
*Note: We have 2177 zipcodes with single occupancy housing units but no damage calculations.
*So I assign damages to these zipcodes based on the damages from the previous zipcode (when zipcodes are listed in numerical order)
replace total_damages = total_damages[_n-1] if missing(total_damages)

drop single_units_MOE _merge_housing

*generate total damages (damages per 4kW installation * number of installations)
gen total_damages_all = total_damages*num_systems

sort total_damages single_units
egen rank=rank(-total_damages)
sort rank single_units
drop rank
gen rank=_n /*ranks zipcodes by total_damages and number of houses*/


* Simulation 1 (moving all panels to highest env value zipcode subject to #of housing units)
gen new_num_systems=0

*sum rank 
*scalar minrank=r(min)
*scalar maxrank=r(max)
egen sumsystems = sum(num_systems)
gen remaining = sumsystems
sum rank, meanonly
local maxindex=r(max)
forvalues i=1/`maxindex' {
	quietly replace new_num_systems=min(remaining[`i'],single_units[`i']) if rank==`i' & remaining[`i']>0
	quietly drop remaining
	quietly egen sumNew=sum(new_num_systems)
	quietly gen remaining=sumsystems-sumNew
	quietly drop sumNew
}

gen newTotalDamages=new_num_systems*total_damages
*order rank zipcode state num_systems new1_num_systems single_units avoided_MD_perkW avoided_MD new1_avoided_MD size_kW actualsize_kW single_units_MOE _merge

* check
egen sumNewSystems=sum(new_num_systems)
egen SumOldTotalDamages=sum(total_damages_all)
egen SumNewTotalDamages=sum(newTotalDamages)

drop remaining
* Simulation 2 (moving all panels to highest env value zipcode subject to 30% x #of housing units)
gen single_units30 = floor(0.3*single_units)
*order rank zipcode state num_systems new1_num_systems single_units single_units15 avoided_MD_perkW avoided_MD new1_avoided_MD size_kW actualsize_kW single_units_MOE _merge
gen new_num_systems30=0
gen remaining = sumsystems
sum rank, meanonly
local maxindex=r(max)
forvalues i=1/`maxindex' {
	quietly replace new_num_systems30=min(remaining[`i'],single_units30[`i']) if rank==`i' & remaining[`i']>0
	quietly drop remaining
	quietly egen sumNew30=sum(new_num_systems30)
	quietly gen remaining=sumsystems-sumNew30
	quietly drop sumNew30
}

gen newTotalDamages30=new_num_systems30*total_damages
*order rank zipcode state num_systems new1_num_systems new2_num_systems single_units single_units15 avoided_MD_perkW avoided_MD new1_avoided_MD new2_avoided_MD size_kW actualsize_kW single_units_MOE _merge

* check
egen sumNewSystems30=sum(new_num_systems30)
egen SumNewTotalDamages30=sum(newTotalDamages30)

*Calculate percentage gains
gen gain = (SumNewTotalDamages - SumOldTotalDamages)/SumOldTotalDamages
gen gain30 = (SumNewTotalDamages30 - SumOldTotalDamages)/SumOldTotalDamages


*Count the number of zip codes with solar installations under the reallocation scenarios
count if !missing(num_systems) /*original*/
count if new_num_systems>0 /*unconstrained*/
count if new_num_systems30>0 /*constrained to 30% of housing units*/


*Calculate mean avoided damages across zip codes with installations per avg system
mean total_damages if !missing(num_systems) /*original*/
mean total_damages if new_num_systems>0 /*unconstrained*/
mean total_damages if new_num_systems30>0 /*constrained to 30%*/

save `"${data_intermediate}/Simulation_Results_Country_energy_plus_env"', replace
outsheet zipcode NAME state POPULAT POP_SQM pcac INT total_damages num_systems new_num_systems new_num_systems30 using `"${data_intermediate}/Simulation_Results_Country_energy_plus_env.csv"', comma replace
